/***
This do-file plots changes in employment from Jan to Jul 2020 in the tracker 
series against the same change in the QCEW series.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Employment"

* Create required subfolders
cap mkdir "${root}/data/derived/QCEW"
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

*-------------------------------------------------------------------------------
**# 1. Clean CZ covariates 
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta")
project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta")
use "${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta", clear
rename countyfips county_fips 
merge 1:1 county_fips using "${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta", nogen keep(1 3) keepusing(pop_2014_2018_est)
rename (county_fips pop_2014_2018_est)(countyfips pop_2018)
gcollapse (sum) cz_pop = pop_2018, by(cz czname)
gsort -cz_pop
gen pop_rank = _n 
tempfile ranks
save `ranks'

*-------------------------------------------------------------------------------
**# 2. Clean QCEW 
*-------------------------------------------------------------------------------

**# 2.1. Base employment at county level 

* Load QCEW employment counts
project, uses("${root}/data/derived/QCEW/QCEW by county x industry.dta")
use "${root}/data/derived/QCEW/QCEW by county x industry.dta", clear 

keep if year == 2020 & month == 1
gcollapse (sum) base_qcew = qcew_employment, by(countyfips)

tempfile qcew_county_jan 
save `qcew_county_jan'

**# 2.2. CZ level 

* Load QCEW employment counts
project, uses("${root}/data/derived/QCEW/QCEW by county x industry.dta")
use "${root}/data/derived/QCEW/QCEW by county x industry.dta", clear 

drop if inlist(industry_code, "99")
gen statefips = floor(countyfips / 1000)
drop if inlist(statefips, 72, 78) 

* Get 50 largest CZs 
project, uses("${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta")
merge m:1 countyfips using "${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta", nogen keep(1 3) keepusing(cz)
merge m:1 cz using `ranks', keep(1 3) nogen
keep if pop_rank <= 50

* Collapse to CZ level
gcollapse (sum) qcew_employment, by(month year cz czname)

* Norm
cap drop temp
gen temp = qcew_employment if month == 1 & year == 2020
gegen base_qcew = mean(temp), by(cz)
gen norm_qcew = 100 * (qcew_employment / base_qcew - 1)

		
tempfile qcew
save `qcew'

*-------------------------------------------------------------------------------
**# 3. Clean Paychex 
*-------------------------------------------------------------------------------

* Merge Final Tracker daily data but convert to months using QCEW base 
project, uses("${root}/data/web/data/Employment - County - Weekly.csv")
import delimited using "${root}/data/web/data/Employment - County - Weekly.csv", clear

* Collapse to month level 
gcollapse emp, by(year month countyfips)

* Impute emp counts 
merge m:1 countyfips using `qcew_county_jan', nogen keep(1 3)  
gen emp_count = (emp + 1) * base_qcew

* Get 50 largest CZs 
project, uses("${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta")
merge m:1 countyfips using "${root}/data/dvc/Opportunity Atlas/county_covars_atlas.dta", nogen keep(1 3) keepusing(cz)
merge m:1 cz using `ranks', keep(1 3) nogen
keep if pop_rank <= 50

* Collapse to CZ level 
gcollapse (sum) emp_count, by(year month cz czname)

* Norm
cap drop temp
gen temp = emp_count if month == 1 & year == 2020
gegen base = mean(temp), by(cz)
gen norm_emp_qcew_base = 100 * (emp_count / base - 1)
	
tempfile tracker_qtr
save `tracker_qtr'

*-------------------------------------------------------------------------------
**# 4. Scatterplot
*-------------------------------------------------------------------------------

use `qcew', clear 
merge 1:1 cz month year using `tracker_qtr', keep(3) nogen
merge m:1 cz using `ranks', keep(1 3) nogen

* We're interested in changes to July 2020 
keep if year == 2020 & month == 7

* Corr
corr norm_qcew norm_emp_qcew_base [w = cz_pop]
local w_corr: di %3.2f r(rho)

* RMSE
gen squared_error = (norm_qcew - norm_emp_qcew_base) ^ 2
sum squared_error [w = cz_pop] 
local w_rmse: di %4.2f sqrt(r(mean))

tw (lfit norm_emp_qcew_base norm_qcew [w = cz_pop], color(oi2)) ///
(scatter norm_emp_qcew_base norm_qcew, mlabel(czname) msymbol(i) mlabcolor(oi1) mlabpos(0) mlabcolor(oi1)) ,  ///
ytitle("Paychex-Intuit Change in Employment (%)" "from January to July 2020") ///
legend(off) ///
xlab(0 "0%" -5 "-5%" -10 "-10%" -15 "-15%" -20 "-20%", nogrid) ///
xtitle("QCEW Change in Employment (%)" "from January to July 2020") ///
text(-19 -3 "Correlation: `w_corr'" "RMSE: `w_rmse' p.p.", color(oi2) placement(east) justification(left) size(small)) ///
ylabel(0 "0%" -5 "-5%" -10 "-10%" -15 "-15%" -20 "-20%", nogrid)
					
oi_graph_export "${root}/results/Employment/scatter qcew tracker by cz july 2020", type(${fig_type})
	
*-------------------------------------------------------------------------------
* Output numbers for paper
*-------------------------------------------------------------------------------

cap erase "${root}/results/paper numbers/`category'/Change in Employment Rates to July by CZ - Paychex-Intuit-Earnin vs QCEW.yaml"

* Export output numbers to csv file
yamlout using "${root}/results/paper numbers/`category'/Change in Employment Rates to July by CZ - Paychex-Intuit-Earnin vs QCEW.yaml", ///
	key("emp_qcew_county_corr") ///
	comment("Correlation") ///
	value(`w_corr') fmt(%9.2f)
	
yamlout using "${root}/results/paper numbers/`category'/Change in Employment Rates to July by CZ - Paychex-Intuit-Earnin vs QCEW.yaml", ///
	key("emp_qcew_county_corr") ///
	comment("RMSE") ///
	value(`w_rmse') fmt(%9.2f)

project, creates("${root}/results/paper numbers/`category'/Change in Employment Rates to July by CZ - Paychex-Intuit-Earnin vs QCEW.yaml")
